{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "# !pip3 install einops==0.3.0\n",
    "# !pip3 install tensorflow==1.15.5"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "import sys\n",
    "\n",
    "SOURCE_DIR = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__name__))))\n",
    "sys.path.insert(0, SOURCE_DIR)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "import tensorflow as tf\n",
    "import numpy as np\n",
    "from malaya.train.model.fastformer.fast_transformer import FastTransformer\n",
    "\n",
    "# tf.compat.v1.enable_eager_execution()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "# pos_emb = RotaryEmbedding(dim = 32)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "# x = tf.placeholder(tf.int32, [None])\n",
    "# x"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "# freqs = pos_emb(x, cache_key = 1024)\n",
    "# freqs"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "# q = tf.random.normal((1, 1024, 64))\n",
    "# k = tf.random.normal((1, 1024, 64))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "# freqs = freqs[None, ...]\n",
    "# freqs"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [],
   "source": [
    "# q = apply_rotary_emb(freqs, q)\n",
    "# k = apply_rotary_emb(freqs, k)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [],
   "source": [
    "# sess = tf.Session()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "# sess.run(k, feed_dict = {x: np.arange(1024)})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<tf.Tensor 'NotEqual:0' shape=(?, ?) dtype=bool>"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "x = tf.placeholder(tf.int32, [None, None])\n",
    "mask = tf.math.not_equal(x, 0)\n",
    "mask = tf.cast(mask, tf.bool)\n",
    "mask"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [],
   "source": [
    "# mask = tf.ones((1, 4096))\n",
    "# x = tf.convert_to_tensor(np.random.randint(0, 20000, (1, 4096), dtype = np.int32))\n",
    "# mask = tf.cast(mask, tf.bool)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "WARNING:tensorflow:From /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/tensorflow_core/python/keras/initializers.py:119: calling RandomUniform.__init__ (from tensorflow.python.ops.init_ops) with dtype is deprecated and will be removed in a future version.\n",
      "Instructions for updating:\n",
      "Call initializer instance with the dtype argument instead of passing it to the constructor\n",
      "WARNING:tensorflow:From /Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/tensorflow_core/python/ops/resource_variable_ops.py:1630: calling BaseResourceVariable.__init__ (from tensorflow.python.ops.resource_variable_ops) with constraint is deprecated and will be removed in a future version.\n",
      "Instructions for updating:\n",
      "If using Keras pass *_constraint arguments to layers.\n"
     ]
    }
   ],
   "source": [
    "model = FastTransformer(\n",
    "    num_tokens = 20000,\n",
    "    dim = 512,\n",
    "    depth = 2,\n",
    "    max_seq_len = 4096,\n",
    "    absolute_pos_emb = True,\n",
    "    mask = mask\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "WARNING:tensorflow:From /Users/huseinzolkepli/Documents/malaya/malaya/train/model/fastformer/fast_attention.py:87: where (from tensorflow.python.ops.array_ops) is deprecated and will be removed in a future version.\n",
      "Instructions for updating:\n",
      "Use tf.where in 2.0, which has the same broadcast rule as np.where\n",
      "Tensor(\"fast_transformer/pre_norm/fast_attention/Select:0\", shape=(?, 8, ?), dtype=float32)\n",
      "Tensor(\"fast_transformer/pre_norm/fast_attention/Select:0\", shape=(?, 8, ?), dtype=float32)\n",
      "Tensor(\"fast_transformer/pre_norm_2/fast_attention_1/Select:0\", shape=(?, 8, ?), dtype=float32)\n",
      "Tensor(\"fast_transformer/pre_norm_2/fast_attention_1/Select:0\", shape=(?, 8, ?), dtype=float32)\n"
     ]
    }
   ],
   "source": [
    "logits = model(x)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(<tf.Tensor 'fast_transformer/add_4:0' shape=(?, ?, 512) dtype=float32>,\n",
       " <tf.Tensor 'fast_transformer/sequential/dense_14/Tanh:0' shape=(?, 512) dtype=float32>)"
      ]
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "logits"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [],
   "source": [
    "sess = tf.Session()\n",
    "sess.run(tf.global_variables_initializer())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "CPU times: user 291 ms, sys: 39.2 ms, total: 330 ms\n",
      "Wall time: 320 ms\n"
     ]
    }
   ],
   "source": [
    "%%time\n",
    "\n",
    "o = sess.run(logits, feed_dict = {x: np.random.randint(0, 20000, (1, 128), dtype = np.int32)})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "((1, 128, 512), (1, 512))"
      ]
     },
     "execution_count": 22,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "o[0].shape, o[1].shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [],
   "source": [
    "# sess.run(logits, feed_dict = {x: np.random.randint(0, 20000, (1, 1000), dtype = np.int32)}).shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [],
   "source": [
    "# model.token_emb._trainable_weights[0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [],
   "source": [
    "# tf.matmul(logits[0], model.token_emb._trainable_weights[0],transpose_b=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.7"
  },
  "varInspector": {
   "cols": {
    "lenName": 16,
    "lenType": 16,
    "lenVar": 40
   },
   "kernels_config": {
    "python": {
     "delete_cmd_postfix": "",
     "delete_cmd_prefix": "del ",
     "library": "var_list.py",
     "varRefreshCmd": "print(var_dic_list())"
    },
    "r": {
     "delete_cmd_postfix": ") ",
     "delete_cmd_prefix": "rm(",
     "library": "var_list.r",
     "varRefreshCmd": "cat(var_dic_list()) "
    }
   },
   "types_to_exclude": [
    "module",
    "function",
    "builtin_function_or_method",
    "instance",
    "_Feature"
   ],
   "window_display": false
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
